2  Skin: Normalization, doublet discrimination, integration, clustering

2.1 Set up Seurat workspace

# Load libraries
library(data.table)
library(devtools)
library(presto)
library(BiocParallel)
library(glmGamPoi)
library(sctransform)
library(Seurat)
library(tidyverse)
library(miQC)
library(SeuratWrappers)
library(flexmix)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(readxl)
library(fishpond)
library(Matrix)
library(speckle)
library(scater)
library(patchwork)
library(vctrs)
library(alevinQC)
library(harmony)
library(scDblFinder)
library(cellXY)

# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')

2.2 Load previously saved object

merged.18279.skin <- readRDS("Skin_scRNA_Part1.rds")

2.3 Normalize and scale data

Regress out mitochondrial contribution

merged.18279.skin <- NormalizeData(merged.18279.skin)
merged.18279.skin <- FindVariableFeatures(merged.18279.skin, 
                                    assay="RNA", 
                                    layer="data", 
                                    selection.method = "vst", 
                                    nfeatures = 5000)
merged.18279.skin <- ScaleData(merged.18279.skin, vars.to.regress = "percent.mt")

2.4 Run PCA

merged.18279.skin <- RunPCA(merged.18279.skin, npcs = 200, verbose = FALSE)
ElbowPlot(merged.18279.skin, ndims = 200, reduction = "pca")

2.5 Plot unintegrated UMAP

merged.18279.skin <- RunUMAP(merged.18279.skin, 
                        reduction = "pca", 
                        dims = 1:50, 
                        reduction.name = "umap.unintegrated")
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
12:19:39 UMAP embedding parameters a = 0.9922 b = 1.112
12:19:39 Read 46891 rows and found 50 numeric columns
12:19:39 Using Annoy for neighbor search, n_neighbors = 30
12:19:39 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:19:46 Writing NN index file to temp file /tmp/RtmpLWXF82/file2d64c0482d37f6
12:19:46 Searching Annoy index using 1 thread, search_k = 3000
12:20:03 Annoy recall = 100%
12:20:04 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
12:20:07 Initializing from normalized Laplacian + noise (using RSpectra)
12:20:10 Commencing optimization for 200 epochs, with 2042658 positive edges
12:20:30 Optimization finished
DimPlot(merged.18279.skin, reduction = "umap.unintegrated", group.by = c("Site", "Patient", "Timepoint"))

2.6 Call preliminary clusters for the purposes of doublet discrimination

merged.18279.skin <- FindNeighbors(merged.18279.skin, dims = 1:50, reduction = "pca")
Computing nearest neighbor graph
Computing SNN
merged.18279.skin <- FindClusters(merged.18279.skin, 
                             resolution = 0.4, 
                             algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 46891
Number of edges: 1576348

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9313
Number of communities: 22
Elapsed time: 10 seconds
DimPlot(merged.18279.skin, reduction = "umap.unintegrated", label = T)

2.7 Identify and remove doublets

This uses raw counts as input

2.7.1 Combine RNA layers

merged.18279.skin[['RNA']] <- JoinLayers(merged.18279.skin[['RNA']])

2.7.2 Convert seurat to sce and check colData

merged.18279.skin.sce <- as.SingleCellExperiment(merged.18279.skin, assay = "RNA")
merged.18279.skin.sce
class: SingleCellExperiment 
dim: 61217 46891 
metadata(0):
assays(2): counts logcounts
rownames(61217): 5-8S-rRNA 5S-rRNA ... ZZEF1 ZZZ3
rowData names(0):
colnames(46891): P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA
  P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC ...
  P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA
  P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT
colData names(15): orig.ident nCount_RNA ... seurat_clusters ident
reducedDimNames(2): PCA UMAP.UNINTEGRATED
mainExpName: RNA
altExpNames(0):
colData(merged.18279.skin.sce)
DataFrame with 46891 rows and 15 columns
                                                  orig.ident nCount_RNA
                                                 <character>  <numeric>
P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA SeuratProject    26987.6
P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC SeuratProject    26321.6
P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT SeuratProject    29916.6
P101_Skin_Pre3rd_2.5mgIpi_RNA_TTAGTTCAGCACCGTC SeuratProject    28724.6
P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT SeuratProject    25775.8
...                                                      ...        ...
P111_Skin_Post3rd_5mgIpi_RNA_CAGGTGCGTCGCTTTA  SeuratProject        532
P111_Skin_Post3rd_5mgIpi_RNA_GTCACGACACATTTCT  SeuratProject        502
P111_Skin_Post3rd_5mgIpi_RNA_GAGACCTAGGGTCTCC  SeuratProject        618
P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA  SeuratProject        508
P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT  SeuratProject        511
                                               nFeature_RNA percent.mt
                                                  <integer>  <numeric>
P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA         5591    3.39147
P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC         5500    3.17440
P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT         6213    6.09781
P101_Skin_Pre3rd_2.5mgIpi_RNA_TTAGTTCAGCACCGTC         5440    4.31684
P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT         4171    2.80602
...                                                     ...        ...
P111_Skin_Post3rd_5mgIpi_RNA_CAGGTGCGTCGCTTTA           423   1.879699
P111_Skin_Post3rd_5mgIpi_RNA_GTCACGACACATTTCT           484   0.796813
P111_Skin_Post3rd_5mgIpi_RNA_GAGACCTAGGGTCTCC           508   1.941748
P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA           452   1.279528
P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT           386   0.391389
                                               miQC.probability   miQC.keep
                                                      <numeric> <character>
P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA        0.0454933        keep
P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC        0.0462136        keep
P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT        0.0893537        keep
P101_Skin_Pre3rd_2.5mgIpi_RNA_TTAGTTCAGCACCGTC        0.0520042        keep
P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT        0.0498276        keep
...                                                         ...         ...
P111_Skin_Post3rd_5mgIpi_RNA_CAGGTGCGTCGCTTTA         0.0148312        keep
P111_Skin_Post3rd_5mgIpi_RNA_GTCACGACACATTTCT         0.0225911        keep
P111_Skin_Post3rd_5mgIpi_RNA_GAGACCTAGGGTCTCC         0.0154750        keep
P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA         0.0180636        keep
P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT         0.0260453        keep
                                                   Patient        Site
                                               <character> <character>
P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA        P101        Skin
P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC        P101        Skin
P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT        P101        Skin
P101_Skin_Pre3rd_2.5mgIpi_RNA_TTAGTTCAGCACCGTC        P101        Skin
P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT        P101        Skin
...                                                    ...         ...
P111_Skin_Post3rd_5mgIpi_RNA_CAGGTGCGTCGCTTTA         P111        Skin
P111_Skin_Post3rd_5mgIpi_RNA_GTCACGACACATTTCT         P111        Skin
P111_Skin_Post3rd_5mgIpi_RNA_GAGACCTAGGGTCTCC         P111        Skin
P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA         P111        Skin
P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT         P111        Skin
                                                 Timepoint   IpiCohort
                                               <character> <character>
P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA      Pre3rd    2.5mgIpi
P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC      Pre3rd    2.5mgIpi
P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT      Pre3rd    2.5mgIpi
P101_Skin_Pre3rd_2.5mgIpi_RNA_TTAGTTCAGCACCGTC      Pre3rd    2.5mgIpi
P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT      Pre3rd    2.5mgIpi
...                                                    ...         ...
P111_Skin_Post3rd_5mgIpi_RNA_CAGGTGCGTCGCTTTA      Post3rd      5mgIpi
P111_Skin_Post3rd_5mgIpi_RNA_GTCACGACACATTTCT      Post3rd      5mgIpi
P111_Skin_Post3rd_5mgIpi_RNA_GAGACCTAGGGTCTCC      Post3rd      5mgIpi
P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA      Post3rd      5mgIpi
P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT      Post3rd      5mgIpi
                                                     Assay
                                               <character>
P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA         RNA
P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC         RNA
P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT         RNA
P101_Skin_Pre3rd_2.5mgIpi_RNA_TTAGTTCAGCACCGTC         RNA
P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT         RNA
...                                                    ...
P111_Skin_Post3rd_5mgIpi_RNA_CAGGTGCGTCGCTTTA          RNA
P111_Skin_Post3rd_5mgIpi_RNA_GTCACGACACATTTCT          RNA
P111_Skin_Post3rd_5mgIpi_RNA_GAGACCTAGGGTCTCC          RNA
P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA          RNA
P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT          RNA
                                                               Sample
                                                          <character>
P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA P101_Skin_Pre3rd_2.5..
P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC P101_Skin_Pre3rd_2.5..
P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT P101_Skin_Pre3rd_2.5..
P101_Skin_Pre3rd_2.5mgIpi_RNA_TTAGTTCAGCACCGTC P101_Skin_Pre3rd_2.5..
P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT P101_Skin_Pre3rd_2.5..
...                                                               ...
P111_Skin_Post3rd_5mgIpi_RNA_CAGGTGCGTCGCTTTA  P111_Skin_Post3rd_5m..
P111_Skin_Post3rd_5mgIpi_RNA_GTCACGACACATTTCT  P111_Skin_Post3rd_5m..
P111_Skin_Post3rd_5mgIpi_RNA_GAGACCTAGGGTCTCC  P111_Skin_Post3rd_5m..
P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA  P111_Skin_Post3rd_5m..
P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT  P111_Skin_Post3rd_5m..
                                               RNA_snn_res.0.4 seurat_clusters
                                                      <factor>        <factor>
P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA              9               9 
P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC              13              13
P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT              12              12
P101_Skin_Pre3rd_2.5mgIpi_RNA_TTAGTTCAGCACCGTC              1               1 
P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT              3               3 
...                                                        ...             ...
P111_Skin_Post3rd_5mgIpi_RNA_CAGGTGCGTCGCTTTA               4               4 
P111_Skin_Post3rd_5mgIpi_RNA_GTCACGACACATTTCT               11              11
P111_Skin_Post3rd_5mgIpi_RNA_GAGACCTAGGGTCTCC               11              11
P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA               4               4 
P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT               4               4 
                                                  ident
                                               <factor>
P101_Skin_Pre3rd_2.5mgIpi_RNA_AAACCTGCACCATGTA       9 
P101_Skin_Pre3rd_2.5mgIpi_RNA_CACATAGCAGGCTCAC       13
P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT       12
P101_Skin_Pre3rd_2.5mgIpi_RNA_TTAGTTCAGCACCGTC       1 
P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT       3 
...                                                 ...
P111_Skin_Post3rd_5mgIpi_RNA_CAGGTGCGTCGCTTTA        4 
P111_Skin_Post3rd_5mgIpi_RNA_GTCACGACACATTTCT        11
P111_Skin_Post3rd_5mgIpi_RNA_GAGACCTAGGGTCTCC        11
P111_Skin_Post3rd_5mgIpi_RNA_TGGCCACCAGGCTGAA        4 
P111_Skin_Post3rd_5mgIpi_RNA_TGAGCCGAGACGCGTT        4 

2.7.3 Run scDblFinder

Set the dbr.sd very high to better allow thresholds to be set based on misclassification rates per sample

merged.18279.skin.sce <- scDblFinder(merged.18279.skin.sce,
                    samples = "Sample",
                    dbr.sd = 1,
                    clusters = "seurat_clusters",
                    BPPARAM = MulticoreParam(4,RNGseed=123)
                )

2.7.4 Inspect results

# Look at the classes
table(merged.18279.skin.sce$seurat_clusters, merged.18279.skin.sce$scDblFinder.class)
    
     singlet doublet
  0     8034     407
  1     5222    1273
  2     6081     343
  3     1539    1754
  4     3039      16
  5     2570     206
  6     1923     152
  7     1667     229
  8     1762      97
  9     1612     243
  10    1548      97
  11    1464      34
  12    1197     149
  13     549     295
  14     572     148
  15     518     177
  16     511     109
  17     294     158
  18     286      72
  19     183     113
  20     141      46
  21      60       1
table(merged.18279.skin.sce$Sample, merged.18279.skin.sce$scDblFinder.class)
                                
                                 singlet doublet
  P101_Skin_Post3rd_2.5mgIpi_RNA    2005     333
  P101_Skin_Pre3rd_2.5mgIpi_RNA      549      73
  P103_Skin_Post3rd_2.5mgIpi_RNA    4675     665
  P103_Skin_Pre3rd_2.5mgIpi_RNA     1053     161
  P104_Skin_Post3rd_2.5mgIpi_RNA    6422     974
  P104_Skin_Pre3rd_2.5mgIpi_RNA     3619     672
  P105_Skin_Post3rd_2.5mgIpi_RNA    3996     672
  P105_Skin_Pre3rd_2.5mgIpi_RNA     3017     324
  P106_Skin_Post3rd_2.5mgIpi_RNA    1939     275
  P106_Skin_Pre3rd_2.5mgIpi_RNA     2015     208
  P108_Skin_Post3rd_5mgIpi_RNA      3108     493
  P108_Skin_Pre3rd_5mgIpi_RNA       1148     129
  P109_Skin_Pre3rd_5mgIpi_RNA       1870     274
  P110_Skin_Post3rd_5mgIpi_RNA      1658     321
  P110_Skin_Pre3rd_5mgIpi_RNA       1381     212
  P111_Skin_Post3rd_5mgIpi_RNA      1724     225
  P111_Skin_Pre3rd_5mgIpi_RNA        593     108
# Look at the scores
summary(merged.18279.skin.sce$scDblFinder.score)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0001124 0.0241526 0.0837982 0.2201275 0.2663128 0.9999770 

2.7.5 Save doublet classifications into main Seurat object

merged.18279.skin$doublet_classification <- merged.18279.skin.sce$scDblFinder.class

2.7.6 Count singlets and doublets

table(merged.18279.skin$doublet_classification)

singlet doublet 
  40772    6119 
table(merged.18279.skin$doublet_classification, merged.18279.skin$seurat_clusters)
         
             0    1    2    3    4    5    6    7    8    9   10   11   12   13
  singlet 8034 5222 6081 1539 3039 2570 1923 1667 1762 1612 1548 1464 1197  549
  doublet  407 1273  343 1754   16  206  152  229   97  243   97   34  149  295
         
            14   15   16   17   18   19   20   21
  singlet  572  518  511  294  286  183  141   60
  doublet  148  177  109  158   72  113   46    1

2.7.7 Plot singlets/doublets in UMAP space

DimPlot(merged.18279.skin,reduction = "umap.unintegrated", group.by = "doublet_classification")

2.7.8 Subset object to remove doublets and count remaining cells

merged.18279.skin.singlets <- subset(merged.18279.skin, doublet_classification %in% c("singlet"))
dim(merged.18279.skin.singlets)
[1] 61217 40772
# Count remaining cells per initial cluster
table(merged.18279.skin.singlets$seurat_clusters)

   0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
8034 5222 6081 1539 3039 2570 1923 1667 1762 1612 1548 1464 1197  549  572  518 
  16   17   18   19   20   21 
 511  294  286  183  141   60 

2.8 Remove cells with very high nCount_RNA values, set other final QC filters

merged.18279.skin.singlets <- subset(merged.18279.skin.singlets,
                                subset = nCount_RNA < 40000 & nCount_RNA > 1500 & nFeature_RNA > 750)
dim(merged.18279.skin.singlets)
[1] 61217 33401

2.9 Re-compute PCA

Re-scale data now that it has been subset

merged.18279.skin.singlets[["RNA"]] <- split(merged.18279.skin.singlets[["RNA"]], f = merged.18279.skin.singlets$Sample)

merged.18279.skin.singlets <- FindVariableFeatures(merged.18279.skin.singlets, 
                                    assay = "RNA", 
                                    layer = "data", 
                                    selection.method = "vst", 
                                    nfeatures = 5000)
merged.18279.skin.singlets <- ScaleData(merged.18279.skin.singlets, vars.to.regress = "percent.mt")
merged.18279.skin.singlets <- RunPCA(merged.18279.skin.singlets, npcs = 200, verbose = FALSE)

2.10 Run Harmony integration

merged.18279.skin.singlets <- IntegrateLayers(merged.18279.skin.singlets, 
                                method = HarmonyIntegration, 
                                orig.reduction = "pca",
                                new.reduction = "integrated.harmony"
)
Warning: HarmonyMatrix is deprecated and will be removed in the future from the
API in the future
Warning: Warning: The parameters do_pca and npcs are deprecated. They will be ignored for this function call and please remove parameters do_pca and npcs and pass to harmony cell_embeddings directly.
This warning is displayed once per session.
Warning: Warning: The parameter tau is deprecated. It will be ignored for this function call and please remove parameter tau in future function calls. Advanced users can set value of parameter tau by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter block.size is deprecated. It will be ignored for this function call and please remove parameter block.size in future function calls. Advanced users can set value of parameter block.size by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter max.iter.harmony is replaced with parameter max_iter. It will be ignored for this function call and please use parameter max_iter in future function calls.
This warning is displayed once per session.
Warning: Warning: The parameter max.iter.cluster is deprecated. It will be ignored for this function call and please remove parameter max.iter.cluster in future function calls. Advanced users can set value of parameter max.iter.cluster by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter epsilon.cluster is deprecated. It will be ignored for this function call and please remove parameter epsilon.cluster in future function calls. Advanced users can set value of parameter epsilon.cluster by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter epsilon.harmony is deprecated. It will be ignored for this function call and please remove parameter epsilon.harmony in future function calls. If users want to control if harmony would stop early or not, use parameter early_stop. Advanced users can set value of parameter epsilon.harmony by using parameter .options and function harmony_options().
This warning is displayed once per session.
Transposing data matrix
Using automatic lambda estimation
Initializing state using k-means centroids initialization
Harmony 1/10
Harmony 2/10
Harmony 3/10
Harmony converged after 3 iterations

3 Identify clusters after integration using a range of resolution

merged.18279.skin.singlets <- FindNeighbors(merged.18279.skin.singlets, dims = 1:50, reduction = "integrated.harmony")
Computing nearest neighbor graph
Computing SNN
merged.18279.skin.singlets <- FindClusters(merged.18279.skin.singlets, 
                             resolution = seq(0.1, 2, by = 0.1), 
                             algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9719
Number of communities: 13
Elapsed time: 5 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9536
Number of communities: 16
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9380
Number of communities: 20
Elapsed time: 5 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9275
Number of communities: 23
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9189
Number of communities: 23
Elapsed time: 7 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9102
Number of communities: 23
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9023
Number of communities: 26
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8948
Number of communities: 27
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8881
Number of communities: 27
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8816
Number of communities: 28
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8760
Number of communities: 31
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8707
Number of communities: 33
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8655
Number of communities: 35
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8609
Number of communities: 36
Elapsed time: 6 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8561
Number of communities: 36
Elapsed time: 7 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8514
Number of communities: 38
Elapsed time: 5 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8470
Number of communities: 38
Elapsed time: 5 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8432
Number of communities: 39
Elapsed time: 5 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8388
Number of communities: 41
Elapsed time: 5 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 33401
Number of edges: 1159866

Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8350
Number of communities: 42
Elapsed time: 5 seconds
merged.18279.skin.singlets <- RunUMAP(merged.18279.skin.singlets,
                        reduction = "integrated.harmony",
                        dims = 1:50,
                        reduction.name = "umap.harmony")
12:46:37 UMAP embedding parameters a = 0.9922 b = 1.112
12:46:37 Read 33401 rows and found 50 numeric columns
12:46:37 Using Annoy for neighbor search, n_neighbors = 30
12:46:37 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:46:42 Writing NN index file to temp file /tmp/RtmpLWXF82/file2d64c05b8721f3
12:46:42 Searching Annoy index using 1 thread, search_k = 3000
12:46:53 Annoy recall = 100%
12:46:55 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
12:46:57 Initializing from normalized Laplacian + noise (using RSpectra)
12:47:03 Commencing optimization for 200 epochs, with 1459344 positive edges
12:47:18 Optimization finished
table(merged.18279.skin.singlets$seurat_clusters)

   0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
3511 2421 2019 1778 1651 1459 1439 1342 1276 1254 1161 1068 1060 1014  991  948 
  16   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31 
 940  864  776  589  565  560  559  490  473  406  361  306  286  239  239  229 
  32   33   34   35   36   37   38   39   40   41 
 173  171  169  166  130   86   84   67   47   34 

4 Plot clusters

DimPlot(merged.18279.skin.singlets,
        reduction = "umap.harmony", 
        label = TRUE, 
        group.by = paste0("RNA_snn_res.",seq(0.1,2,0.1)))

4.1 Plot one as example

DimPlot(merged.18279.skin.singlets,
        reduction = "umap.harmony", 
        label = TRUE, 
        group.by = "RNA_snn_res.1")

4.2 Plot metadata in UMAP space

DimPlot(merged.18279.skin.singlets,reduction = "umap.harmony", group.by = "Patient")

DimPlot(merged.18279.skin.singlets,reduction = "umap.harmony", group.by = "Site")

DimPlot(merged.18279.skin.singlets,reduction = "umap.harmony", group.by = "Timepoint")

DimPlot(merged.18279.skin.singlets,reduction = "umap.harmony", group.by = "IpiCohort")

DimPlot(merged.18279.skin.singlets,reduction = "umap.harmony", group.by = "Sample") + NoLegend()

FeaturePlot(merged.18279.skin.singlets,reduction = "umap.harmony",features="nCount_RNA",order=T)

FeaturePlot(merged.18279.skin.singlets,reduction = "umap.harmony",features="nFeature_RNA",order=T)

FeaturePlot(merged.18279.skin.singlets,reduction = "umap.harmony",features="percent.mt",order=T)

4.3 Save clustered object

saveRDS(merged.18279.skin.singlets,"Skin_scRNA_Part2.rds")

4.4 Get session info

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] cellXY_0.99.0               scDblFinder_1.14.0         
 [3] harmony_1.2.0               alevinQC_1.16.1            
 [5] vctrs_0.6.5                 patchwork_1.3.0            
 [7] scater_1.30.1               scuttle_1.12.0             
 [9] speckle_1.0.0               Matrix_1.6-4               
[11] fishpond_2.6.2              readxl_1.4.3               
[13] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
[15] Biobase_2.62.0              GenomicRanges_1.54.1       
[17] GenomeInfoDb_1.38.8         IRanges_2.36.0             
[19] S4Vectors_0.40.2            BiocGenerics_0.48.1        
[21] MatrixGenerics_1.14.0       matrixStats_1.4.1          
[23] flexmix_2.3-19              lattice_0.22-6             
[25] SeuratWrappers_0.3.19       miQC_1.8.0                 
[27] lubridate_1.9.3             forcats_1.0.0              
[29] stringr_1.5.1               dplyr_1.1.4                
[31] purrr_1.0.2                 readr_2.1.5                
[33] tidyr_1.3.1                 tibble_3.2.1               
[35] ggplot2_3.5.1               tidyverse_2.0.0            
[37] Seurat_5.1.0                SeuratObject_5.0.2         
[39] sp_2.1-4                    sctransform_0.4.1          
[41] glmGamPoi_1.12.2            BiocParallel_1.36.0        
[43] presto_1.0.0                Rcpp_1.0.13-1              
[45] devtools_2.4.5              usethis_3.0.0              
[47] data.table_1.16.2          

loaded via a namespace (and not attached):
  [1] fs_1.6.5                  spatstat.sparse_3.1-0    
  [3] bitops_1.0-9              httr_1.4.7               
  [5] RColorBrewer_1.1-3        profvis_0.4.0            
  [7] tools_4.3.2               utf8_1.2.4               
  [9] R6_2.5.1                  DT_0.33                  
 [11] lazyeval_0.2.2            uwot_0.2.2               
 [13] urlchecker_1.0.1          withr_3.0.2              
 [15] GGally_2.2.1              gridExtra_2.3            
 [17] progressr_0.15.1          cli_3.6.3                
 [19] spatstat.explore_3.2-6    fastDummies_1.7.3        
 [21] labeling_0.4.3            spatstat.data_3.1-4      
 [23] ggridges_0.5.6            pbapply_1.7-2            
 [25] Rsamtools_2.18.0          R.utils_2.12.3           
 [27] parallelly_1.39.0         sessioninfo_1.2.2        
 [29] limma_3.58.1              RSQLite_2.3.8            
 [31] BiocIO_1.12.0             generics_0.1.3           
 [33] gtools_3.9.5              ica_1.0-3                
 [35] spatstat.random_3.2-2     ggbeeswarm_0.7.2         
 [37] fansi_1.0.6               abind_1.4-8              
 [39] R.methodsS3_1.8.2         lifecycle_1.0.4          
 [41] yaml_2.3.10               edgeR_4.0.16             
 [43] SparseArray_1.2.2         Rtsne_0.17               
 [45] blob_1.2.4                grid_4.3.2               
 [47] dqrng_0.4.1               promises_1.3.0           
 [49] crayon_1.5.3              shinydashboard_0.7.2     
 [51] miniUI_0.1.1.1            beachmat_2.18.1          
 [53] cowplot_1.1.3             KEGGREST_1.42.0          
 [55] metapod_1.10.1            pillar_1.9.0             
 [57] knitr_1.45                rjson_0.2.23             
 [59] xgboost_1.7.8.1           future.apply_1.11.3      
 [61] codetools_0.2-20          leiden_0.4.3.1           
 [63] glue_1.8.0                remotes_2.5.0            
 [65] png_0.1-8                 spam_2.11-0              
 [67] org.Mm.eg.db_3.18.0       cellranger_1.1.0         
 [69] gtable_0.3.6              cachem_1.1.0             
 [71] xfun_0.49                 S4Arrays_1.2.0           
 [73] mime_0.12                 survival_3.7-0           
 [75] bluster_1.12.0            statmod_1.5.0            
 [77] ellipsis_0.3.2            fitdistrplus_1.2-1       
 [79] ROCR_1.0-11               nlme_3.1-166             
 [81] bit64_4.5.2               RcppAnnoy_0.0.22         
 [83] irlba_2.3.5.1             vipor_0.4.7              
 [85] KernSmooth_2.23-24        DBI_1.2.3                
 [87] colorspace_2.1-1          nnet_7.3-19              
 [89] tidyselect_1.2.1          bit_4.5.0                
 [91] compiler_4.3.2            BiocNeighbors_1.20.2     
 [93] DelayedArray_0.28.0       plotly_4.10.4            
 [95] rtracklayer_1.62.0        scales_1.3.0             
 [97] lmtest_0.9-40             digest_0.6.37            
 [99] goftest_1.2-3             spatstat.utils_3.1-1     
[101] rmarkdown_2.29            RhpcBLASctl_0.23-42      
[103] XVector_0.42.0            htmltools_0.5.8.1        
[105] pkgconfig_2.0.3           sparseMatrixStats_1.14.0 
[107] fastmap_1.2.0             rlang_1.1.4              
[109] htmlwidgets_1.6.4         shiny_1.9.1              
[111] DelayedMatrixStats_1.24.0 farver_2.1.2             
[113] zoo_1.8-12                jsonlite_1.8.9           
[115] R.oo_1.27.0               BiocSingular_1.18.0      
[117] RCurl_1.98-1.16           magrittr_2.0.3           
[119] modeltools_0.2-23         GenomeInfoDbData_1.2.11  
[121] dotCall64_1.2             munsell_0.5.1            
[123] viridis_0.6.5             reticulate_1.35.0        
[125] stringi_1.8.4             zlibbioc_1.48.2          
[127] MASS_7.3-60.0.1           org.Hs.eg.db_3.18.0      
[129] plyr_1.8.9                pkgbuild_1.4.5           
[131] ggstats_0.7.0             parallel_4.3.2           
[133] listenv_0.9.1             ggrepel_0.9.6            
[135] deldir_2.0-4              Biostrings_2.70.3        
[137] splines_4.3.2             tensor_1.5               
[139] hms_1.1.3                 locfit_1.5-9.10          
[141] igraph_2.1.1              spatstat.geom_3.2-8      
[143] RcppHNSW_0.6.0            reshape2_1.4.4           
[145] ScaledMatrix_1.10.0       pkgload_1.4.0            
[147] XML_3.99-0.17             evaluate_1.0.1           
[149] scran_1.30.2              BiocManager_1.30.25      
[151] tzdb_0.4.0                httpuv_1.6.15            
[153] RANN_2.6.2                polyclip_1.10-7          
[155] future_1.34.0             scattermore_1.2          
[157] rsvd_1.0.5                xtable_1.8-4             
[159] restfulr_0.0.15           svMisc_1.2.3             
[161] RSpectra_0.16-2           later_1.3.2              
[163] viridisLite_0.4.2         AnnotationDbi_1.64.1     
[165] GenomicAlignments_1.38.2  memoise_2.0.1            
[167] beeswarm_0.4.0            tximport_1.28.0          
[169] cluster_2.1.6             timechange_0.3.0         
[171] globals_0.16.3